Load packages and set parameters
if (!require("librarian")){
install.packages("librarian")
library(librarian)
}
librarian::shelf(assertthat, BiocManager, tidyverse, gridExtra, here, mapview,
prioritizr, prioritizrdata, raster, remotes, rgeos, rgdal, scales, sf, sp,
units)
if (!require("lpsymphony")){
BiocManager::install("lpsymphony")
library(lpsymphony)
}
options(scipen = 999)
redo <- FALSE
Data setup
dir_data <- here("data/prioritizr")
pu_shp <- file.path(dir_data, "pu.shp")
pu_url <- "https://github.com/prioritizr/massey-workshop/raw/main/data.zip"
pu_zip <- file.path(dir_data, basename(pu_url))
vegetation_tif <- file.path(dir_data, "vegetation.tif")
dir.create(dir_data, showWarnings = F, recursive = T)
if (!file.exists(pu_shp) | redo){
download.file(pu_url, pu_zip)
unzip(pu_zip, exdir = dir_data)
dir_unzip <- file.path(dir_data, "data")
files_unzip <- list.files(dir_unzip, full.names = T)
file.rename(
files_unzip,
files_unzip %>% str_replace("prioritizr/data", "prioritizr"))
unlink(c(pu_zip, dir_unzip), recursive = T)
}
Data import
# import planning unit data
pu_data <- as(read_sf(pu_shp), "Spatial")
# format columns in planning unit data
pu_data$locked_in <- as.logical(pu_data$locked_in)
pu_data$locked_out <- as.logical(pu_data$locked_out)
# import vegetation data
veg_data <- stack(vegetation_tif)
# print a short summary of the data
print(pu_data)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 5
## names : id, cost, status, locked_in, locked_out
## min values : 557, 3.59717531470679, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1
# plot the planning unit data
plot(pu_data)
# plot an interactive map of the planning unit data
mapview(pu_data)
# print the structure of object
str(pu_data, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 516 obs. of 5 variables:
## ..@ polygons :List of 516
## ..@ plotOrder : int [1:516] 69 104 1 122 157 190 4 221 17 140 ...
## ..@ bbox : num [1:2, 1:2] 348703 5167775 611932 5344516
## .. ..- attr(*, "dimnames")=List of 2
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
# print the class of the object
class(pu_data)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# print the slots of the object
slotNames(pu_data)
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
# print the coordinate reference system
print(pu_data@proj4string)
## CRS arguments:
## +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print number of planning units (geometries) in the data
nrow(pu_data)
## [1] 516
# print the first six rows in the data
head(pu_data@data)
## id cost status locked_in locked_out
## 1 557 29.74225 0 FALSE FALSE
## 2 558 29.87703 0 FALSE FALSE
## 3 574 28.60687 0 FALSE FALSE
## 4 575 30.83416 0 FALSE FALSE
## 5 576 38.75511 0 FALSE FALSE
## 6 577 38.11618 2 TRUE FALSE
# print the first six values in the cost column of the attribute data
head(pu_data$cost)
## [1] 29.74225 29.87703 28.60687 30.83416 38.75511 38.11618
# print the highest cost value
max(pu_data$cost)
## [1] 47.23834
# print the smallest cost value
min(pu_data$cost)
## [1] 3.597175
# print average cost value
mean(pu_data$cost)
## [1] 26.87393
# plot a map of the planning unit cost data
spplot(pu_data, "cost")
# plot an interactive map of the planning unit cost data
mapview(pu_data, zcol = "cost")
Question: How many planning units are in the planning unit data?
Answer: There are 516 planning units in the unit data.
Question: What is the highest cost value?
Answer: The highest cost value is $ 47.24 in Australian dollars.
Question: Is there a spatial pattern in the planning unit cost values (hint: use plot to make a map)?
Answer: Yes, the planning units with the highest cost tend to be in the northern most central part of Tasmania, followed by the western half of the region.
# print a short summary of the data
print(veg_data)
## class : RasterStack
## dimensions : 164, 326, 53464, 32 (nrow, ncol, ncell, nlayers)
## resolution : 967, 1020 (x, y)
## extent : 298636.7, 613878.7, 5167756, 5335036 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## names : vegetation.1, vegetation.2, vegetation.3, vegetation.4, vegetation.5, vegetation.6, vegetation.7, vegetation.8, vegetation.9, vegetation.10, vegetation.11, vegetation.12, vegetation.13, vegetation.14, vegetation.15, ...
## min values : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## max values : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
# plot a map of the 20th vegetation class
plot(veg_data[[20]])
# plot an interactive map of the 20th vegetation class
mapview(veg_data[[20]])
# print number of rows in the data
nrow(veg_data)
## [1] 164
# print number of columns in the data
ncol(veg_data)
## [1] 326
# print number of cells in the data
ncell(veg_data)
## [1] 53464
# print number of layers in the data
nlayers(veg_data)
## [1] 32
# print resolution on the x-axis
xres(veg_data)
## [1] 967
# print resolution on the y-axis
yres(veg_data)
## [1] 1020
# print spatial extent of the grid, i.e. coordinates for corners
extent(veg_data)
## class : Extent
## xmin : 298636.7
## xmax : 613878.7
## ymin : 5167756
## ymax : 5335036
# print the coordinate reference system
print(veg_data@crs)
## CRS arguments:
## +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print a summary of the first layer in the stack
print(veg_data[[1]])
## class : RasterLayer
## band : 1 (of 32 bands)
## dimensions : 164, 326, 53464 (nrow, ncol, ncell)
## resolution : 967, 1020 (x, y)
## extent : 298636.7, 613878.7, 5167756, 5335036 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## source : vegetation.tif
## names : vegetation.1
## values : 0, 1 (min, max)
# print the value in the 800th cell in the first layer of the stack
print(veg_data[[1]][800])
##
## 0
# print the value of the cell located in the 30th row and the 60th column of
# the first layer
print(veg_data[[1]][30, 60])
##
## 0
# calculate the sum of all the cell values in the first layer
cellStats(veg_data[[1]], "sum")
## [1] 17
# calculate the maximum value of all the cell values in the first layer
cellStats(veg_data[[1]], "max")
## [1] 1
# calculate the minimum value of all the cell values in the first layer
cellStats(veg_data[[1]], "min")
## [1] 0
# calculate the mean value of all the cell values in the first layer
cellStats(veg_data[[1]], "mean")
## [1] 0.00035239
Question: What part of the study area is the 13th vegetation class found in (hint: make a map)? For instance, is it in the south-eastern part of the study area?
Answer: The 13th vegetation class is found in the eastern part of the studio area with the most density in the north-eastern part of the study area.
# plot a map of the 13th vegetation class
plot(veg_data[[13]])
Question: What proportion of cells contain the 12th vegetation class?
veg_data_freq12 <- freq(veg_data[[12]])
Answer: Of the total cells in the raster (53464), 819 cells contain the 12th vegetation class, making the proportion of the cells that contain the vegetation class 1.53%.
Question: Which vegetation class is the most abundant (i.e. present in the greatest number of cells)?
prop_df <- data.frame()
for (i in 1:nlayers(veg_data)){
prop <- freq(veg_data[[i]])[[2, 2]] / ncell(veg_data)
prop_df <- rbind(prop_df, prop)
}
prop_df <- rownames_to_column(prop_df) %>%
rename(proportion = X0.000317970971120754)
prop_highest <- prop_df %>% filter(proportion == max(proportion))
Answer: The vegetation class that is most abundant is 12.
# create `prioritizr` problem with only the data
p0 <- problem(pu_data, veg_data, cost_column = "cost")
# print empty problem,
# we can see that only the cost and feature data are defined
print(p0)
# calculate amount of each feature in each planning unit
abundance_data <- feature_abundances(p0)
# print abundance data
print(abundance_data)
## # A tibble: 32 × 3
## feature absolute_abundance relative_abundance
## <chr> <dbl> <dbl>
## 1 vegetation.1 16.0 1
## 2 vegetation.2 14.3 1
## 3 vegetation.3 10.4 1
## 4 vegetation.4 17.8 1
## 5 vegetation.5 13.0 1
## 6 vegetation.6 14.3 1
## 7 vegetation.7 20.0 1
## 8 vegetation.8 14.0 1
## 9 vegetation.9 18.0 1
## 10 vegetation.10 20.0 1
## # … with 22 more rows
# note that only the first ten rows are printed,
# this is because the abundance_data object is a tibble (i.e. tbl_df) object
# and not a standard data.frame object
print(class(abundance_data))
## [1] "tbl_df" "tbl" "data.frame"
# we can print all of the rows in abundance_data like this
print(abundance_data, n = Inf)
## # A tibble: 32 × 3
## feature absolute_abundance relative_abundance
## <chr> <dbl> <dbl>
## 1 vegetation.1 16.0 1
## 2 vegetation.2 14.3 1
## 3 vegetation.3 10.4 1
## 4 vegetation.4 17.8 1
## 5 vegetation.5 13.0 1
## 6 vegetation.6 14.3 1
## 7 vegetation.7 20.0 1
## 8 vegetation.8 14.0 1
## 9 vegetation.9 18.0 1
## 10 vegetation.10 20.0 1
## 11 vegetation.11 23.6 1
## 12 vegetation.12 748. 1
## 13 vegetation.13 126. 1
## 14 vegetation.14 10.5 1
## 15 vegetation.15 17.5 1
## 16 vegetation.16 15.0 1
## 17 vegetation.17 213. 1
## 18 vegetation.18 14.3 1
## 19 vegetation.19 17.1 1
## 20 vegetation.20 21.4 1
## 21 vegetation.21 18.6 1
## 22 vegetation.22 297. 1
## 23 vegetation.23 20.3 1
## 24 vegetation.24 165. 1
## 25 vegetation.25 716. 1
## 26 vegetation.26 24.0 1
## 27 vegetation.27 18.8 1
## 28 vegetation.28 17.5 1
## 29 vegetation.29 24.7 1
## 30 vegetation.30 59.0 1
## 31 vegetation.31 60.0 1
## 32 vegetation.32 32 1
# add new column with feature abundances in km^2
abundance_data$absolute_abundance_km2 <-
(abundance_data$absolute_abundance * prod(res(veg_data))) %>%
set_units(m^2) %>%
set_units(km^2)
# print abundance data
print(abundance_data)
## # A tibble: 32 × 4
## feature absolute_abundance relative_abundance absolute_abundance_km2
## <chr> <dbl> <dbl> [km^2]
## 1 vegetation.1 16.0 1 15.8
## 2 vegetation.2 14.3 1 14.1
## 3 vegetation.3 10.4 1 10.2
## 4 vegetation.4 17.8 1 17.6
## 5 vegetation.5 13.0 1 12.8
## 6 vegetation.6 14.3 1 14.1
## 7 vegetation.7 20.0 1 19.7
## 8 vegetation.8 14.0 1 13.9
## 9 vegetation.9 18.0 1 17.8
## 10 vegetation.10 20.0 1 19.7
## # … with 22 more rows
# calculate the average abundance of the features
mean(abundance_data$absolute_abundance_km2)
## 86.82948 [km^2]
# plot histogram of the features' abundances
hist(abundance_data$absolute_abundance_km2, main = "Feature abundances")
# find the abundance of the feature with the largest abundance
max(abundance_data$absolute_abundance_km2)
## 737.982 [km^2]
# find the name of the feature with the largest abundance
abundance_data$feature[which.max(abundance_data$absolute_abundance_km2)]
## [1] "vegetation.12"
Question: What is the median abundance of the features (hint: median)?
Answer: The median abundance from the abundance dataset is 19.1165038783561
Question: What is the name of the feature with smallest abundance?
Answer:
Question: How many features have a total abundance greater than 100 km^2 (hint: use sum(abundance_data$absolute_abundance_km2 > set_units(threshold, km^2) with the correct threshold value)?
Answer:
# create column in planning unit data with binary values (zeros and ones)
# indicating if a planning unit is covered by protected areas or not
pu_data$pa_status <- as.numeric(pu_data$locked_in)
# calculate feature representation by protected areas
repr_data <- eval_feature_representation_summary(p0, pu_data[, "pa_status"])
# print feature representation data
print(repr_data)
## # A tibble: 32 × 5
## summary feature total_amount absolute_held relative_held
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 overall vegetation.1 16.0 0 0
## 2 overall vegetation.2 14.3 0 0
## 3 overall vegetation.3 10.4 0 0
## 4 overall vegetation.4 17.8 0 0
## 5 overall vegetation.5 13.0 0 0
## 6 overall vegetation.6 14.3 0 0
## 7 overall vegetation.7 20.0 0 0
## 8 overall vegetation.8 14.0 0 0
## 9 overall vegetation.9 18.0 0.846 0.0470
## 10 overall vegetation.10 20.0 0 0
## # … with 22 more rows
# add new column with the areas represented in km^2
repr_data$absolute_held_km2 <-
(repr_data$absolute_held * prod(res(veg_data))) %>%
set_units(m^2) %>%
set_units(km^2)
# print representation data
print(repr_data)
## # A tibble: 32 × 6
## summary feature total_amount absolute_held relative_held absolute_held_k…
## <chr> <chr> <dbl> <dbl> <dbl> [km^2]
## 1 overall vegetation.1 16.0 0 0 0
## 2 overall vegetation.2 14.3 0 0 0
## 3 overall vegetation.3 10.4 0 0 0
## 4 overall vegetation.4 17.8 0 0 0
## 5 overall vegetation.5 13.0 0 0 0
## 6 overall vegetation.6 14.3 0 0 0
## 7 overall vegetation.7 20.0 0 0 0
## 8 overall vegetation.8 14.0 0 0 0
## 9 overall vegetation.9 18.0 0.846 0.0470 0.834
## 10 overall vegetation.10 20.0 0 0 0
## # … with 22 more rows
Question: What is the average proportion of the features held in protected areas (hint: use mean(table$relative_held) with the correct table name)?
Answer:
Question: If we set a target of 10% coverage by protected areas, how many features fail to meet this target (hint: use sum(table$relative_held >= target_value) with the correct table name)?
Answer:
Question: If we set a target of 20% coverage by protected areas, how many features fail to meet this target?
Answer:
Question: Is there a relationship between the total abundance of a feature and how well it is represented by protected areas (hint: plot(abundance_data$absolute_abundance ~ repr_data$relative_held))?
Answer:
# print planning unit data
print(pu_data)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 6
## names : id, cost, status, locked_in, locked_out, pa_status
## min values : 557, 3.59717531470679, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1
# make prioritization problem
p1_rds <- file.path(dir_data, "p1.rds")
if (!file.exists(p1_rds)){
p1 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.05) %>% # 5% representation targets
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p1, p1_rds)
}
p1 <- readRDS(p1_rds)
# print problem
print(p1)
# solve problem
s1 <- solve(p1)
# print solution, the solution_1 column contains the solution values
# indicating if a planning unit is (1) selected or (0) not
print(s1)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 7
## names : id, cost, status, locked_in, locked_out, pa_status, solution_1
## min values : 557, 3.59717531470679, 0, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1, 1
# calculate number of planning units selected in the prioritization
eval_n_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
## summary cost
## <chr> <dbl>
## 1 overall 15
# calculate total cost of the prioritization
eval_cost_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
## summary cost
## <chr> <dbl>
## 1 overall 385.
# plot solution
# selected = green, not selected = grey
spplot(s1, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s1",
colorkey = FALSE)
Question: How many planing units were selected in the prioritization? What proportion of planning units were selected in the prioritization?
Answer:
Question: Is there a pattern in the spatial distribution of the priority areas?
Answer:
Question: Can you verify that all of the targets were met in the prioritization (hint: eval_feature_representation_summary(p1, s1[, "solution_1"]))?
Answer:
# plot locked_in data
# TRUE = blue, FALSE = grey
spplot(pu_data, "locked_in", col.regions = c("grey80", "darkblue"),
main = "locked_in", colorkey = FALSE)
# make prioritization problem
p2_rds <- file.path(dir_data, "p2.rds")
if (!file.exists(p2_rds)){
p2 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.05) %>%
add_locked_in_constraints("locked_in") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p2, p2_rds)
}
p2 <- readRDS(p2_rds)
# print problem
print(p2)
# solve problem
s2 <- solve(p2)
# plot solution
# selected = green, not selected = grey
spplot(s2, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s2",
colorkey = FALSE)
Let’s pretend that we talked to an expert on the vegetation communities in our study system and they recommended that a 10% target was needed for each vegetation class. So, equipped with this information, let’s set the targets to 10%.
# make prioritization problem
p3_rds <- file.path(dir_data, "p3.rds")
if (!file.exists(p3_rds)){
p3 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p3, p3_rds)
}
p3 <- readRDS(p3_rds)
# print problem
print(p3)
# solve problem
s3 <- solve(p3)
# plot solution
# selected = green, not selected = grey
spplot(s3, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s3",
colorkey = FALSE)
Next, let’s lock out highly degraded areas. Similar to before, this information is present in our planning unit data so we can use the locked_out column name to achieve this.
# plot locked_out data
# TRUE = red, FALSE = grey
spplot(pu_data, "locked_out", col.regions = c("grey80", "darkred"),
main = "locked_out", colorkey = FALSE)
# make prioritization problem
p4_rds <- file.path(dir_data, "p4.rds")
if (!file.exists(p4_rds)){
p4 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_locked_out_constraints("locked_out") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p4, p4_rds)
}
p4 <- readRDS(p4_rds)
# print problem
print(p4)
# solve problem
s4 <- solve(p4)
# plot solution
# selected = green, not selected = grey
spplot(s4, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s4",
colorkey = FALSE)
Question: What is the cost of the planning units selected in s2, s3, and s4?
Answer:
Question: How many planning units are in s2, s3, and s4?
Answer:
Question: Do the solutions with more planning units have a greater cost? Why (or why not)?
Answer:
Question: Why does the first solution (s1) cost less than the second solution with protected areas locked into the solution (s2)?
Answer:
Question: Why does the third solution (s3) cost less than the fourth solution solution with highly degraded areas locked out (s4)?
Answer:
# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds)){
p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_boundary_penalties(penalty = 0.001) %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_locked_out_constraints("locked_out") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)
# print problem
print(p5)
# solve problem,
# note this will take a bit longer than the previous runs
s5 <- solve(p5)
# print solution
print(s5)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 7
## names : id, cost, status, locked_in, locked_out, pa_status, solution_1
## min values : 557, 3.59717531470679, 0, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1, 1
# plot solution
# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
colorkey = FALSE)
Question: What is the cost the fourth (s4) and fifth (s5) solutions? Why does the fifth solution (s5) cost more than the fourth (s4) solution?
Answer:
Question: Try setting the penalty value to 0.000000001 (i.e. 1e-9) instead of 0.001. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this is a useful penalty value? Why (or why not)?
Answer:
Question:
Answer: Try setting the penalty value to 0.5. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this a useful penalty value? Why (or why not)?